In [1]:
import geopandas as gpd

In [2]:
#bld = gpd.GeoDataFrame.from_file('Building Footprints.geojson')

In [2]:
import csv
import numpy as np
import matplotlib.pyplot as pl
from matplotlib import colors
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import seaborn as sns
%matplotlib inline


from fiona.crs import from_epsg
import pysal as ps
import shapely as shp
import ast
from cStringIO import StringIO
import requests
import os
import json
import urllib
import urllib2

f, ax = pl.subplots(figsize=(55,55))

bld.centroid.plot(linewidth =1, ax = ax, color='b', label = 'tracks')

pl.axis('off')

ticks = ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)

pl.title("NYC Tracks", size=20)


In [7]:
walk= gpd.GeoDataFrame.from_file('WalkingDist.geojson')
walk.crs


Out[7]:
{}

In [4]:
walk.head()


Out[4]:
Walkingtime geometry id
0 15 (POLYGON ((-73.89580509323054 40.6874843827842... 0
1 30 POLYGON ((-73.88281660215223 40.6898960275602,... 1

In [16]:
walk.to_file('WalkingDist')
walk.crs = {'init' :'epsg:4326'}

In [10]:
bld=gpd.GeoDataFrame.from_file('output/Brooklynstreets.shp')

In [11]:
bld.crs


Out[11]:
{u'datum': u'NAD83',
 u'lat_0': 40.16666666666666,
 u'lat_1': 40.66666666666666,
 u'lat_2': 41.03333333333333,
 u'lon_0': -74,
 u'no_defs': True,
 u'proj': u'lcc',
 u'units': u'us-ft',
 u'x_0': 300000,
 u'y_0': 0}

In [17]:
walk.crs


Out[17]:
{'init': 'epsg:4326'}

In [12]:
sta =gpd.GeoDataFrame.from_file('subStations/subStations.shp')
sta.crs


Out[12]:
{}

In [43]:
sta =gpd.GeoDataFrame.from_file('Subway Stations/geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.shp')
sta.crs


Out[43]:
{'init': u'epsg:4326'}

In [45]:
sta.crs_wkt



AttributeErrorTraceback (most recent call last)
<ipython-input-45-bf7857c0597d> in <module>()
----> 1 sta.crs_wkt

C:\Users\ferna\Anaconda2\lib\site-packages\pandas\core\generic.pyc in __getattr__(self, name)
   2670             if name in self._info_axis:
   2671                 return self[name]
-> 2672             return object.__getattribute__(self, name)
   2673 
   2674     def __setattr__(self, name, value):

AttributeError: 'GeoDataFrame' object has no attribute 'crs_wkt'

In [18]:
walk.to_crs({u'datum': u'NAD83',
 u'lat_0': 40.16666666666666,
 u'lat_1': 40.66666666666666,
 u'lat_2': 41.03333333333333,
 u'lon_0': -74,
 u'no_defs': True,
 u'proj': u'lcc',
 u'units': u'us-ft',
 u'x_0': 300000,
 u'y_0': 0})


Out[18]:
Walkingtime geometry id
0 15 (POLYGON ((1013146.666982266 189762.7169624277... 0
1 30 POLYGON ((1016747.624375751 190645.9005413272,... 1

In [20]:
walk.crs


Out[20]:
{'init': 'epsg:4326'}

In [21]:
walk.to_file('therealwalk')

In [32]:
crs = {'init': 'epsg:4326'}

In [38]:
walk = gpd.read_file('WalkingDist.geojson', crs=crs)

In [39]:
walk.head()


Out[39]:
Walkingtime geometry id
0 15 (POLYGON ((-73.89580509323054 40.6874843827842... 0
1 30 POLYGON ((-73.88281660215223 40.6898960275602,... 1

In [40]:
walk.crs


Out[40]:
{}

In [42]:
walk.crs = {'init': 'epsg:4326'}
walk.crs


Out[42]:
{'init': 'epsg:4326'}

In [26]:
gpd.read_file?

In [ ]:
ws = "Subway Stations/"
prj_file =gpd.datasets.get_path('geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4')
sta.crs
prj_file = gpd.datasets.get_path('naturalearth_lowres').replace(".shp",".prj")
prj = [l.strip() for l in open(prj_file,'r')][0]
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
temp_shp = os.path.join(ws,"world_out.shp")
world.to_file(filename=temp_shp,driver='ESRI Shapefile',crs_wkt=prj)

In [48]:
prj_file =gpd.datasets.get_path('geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.shp').replace(".shp",".prj")
prj_file



ValueErrorTraceback (most recent call last)
<ipython-input-48-43fa666ee636> in <module>()
----> 1 prj_file =gpd.datasets.get_path('geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.shp').replace(".shp",".prj")
      2 prj_file

C:\Users\ferna\Anaconda2\lib\site-packages\geopandas\datasets\__init__.pyc in get_path(dataset)
     25     else:
     26         msg = "The dataset '{data}' is not available".format(data=dataset)
---> 27         raise ValueError(msg)

ValueError: The dataset 'geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.shp' is not available

In [62]:
prj = [l.strip() for l in open('Subway Stations/geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.prj','r')][0]

In [63]:
prj


Out[63]:
'GEOGCS["WGS84(DD)", DATUM["WGS84", SPHEROID["WGS84", 6378137.0, 298.257223563]], PRIMEM["Greenwich", 0.0], UNIT["degree", 0.017453292519943295], AXIS["Geodetic longitude", EAST], AXIS["Geodetic latitude", NORTH]]'

In [60]:
walk.to_file(filename='therealwalk2',driver='ESRI Shapefile',crs_wkt=prj)

In [64]:
walk = gpd.read_file('therealwalk2/therealwalk2.shp')
walk.crs


Out[64]:
{'init': u'epsg:4326'}

In [65]:
nei = gpd.read_file('../Class_7/NYCneighbourhoods/NYCneighbourhoods.shp')
nei.crs


Out[65]:
{}

In [66]:
nei.to_file(filename='NYCNeighbourhoods',driver='ESRI Shapefile',crs_wkt=prj)

In [3]:
import pandas as pd
import numpy as np
import geopandas as gd
import matplotlib.pylab as pl
from fiona.crs import from_epsg
import fiona
from matplotlib import colors
import shapely
import seaborn as sns
%matplotlib inline

In [23]:
import requests
import time

url = "https://api.yelp.com/v3/businesses/search"
def yelp(x,y,r):
    querystring = {"latitude":str(x),"longitude":str(y),"radius":str(r),"limit":str(50)}
    headers = {
        'authorization': "Bearer BLFZLp20YRlPJFKs6htUY824brFnUTfsbdsYapxcBoBYXTnOYjwFaH1wr0BK7i6lMbLZlVvAFgG3OE6MAQY849BTw1arAuHGevfHh6TRCsVwMnG2GXqejJTWRzErWXYx",
        'cache-control': "no-cache",
        'postman-token': "e903653d-085f-354b-96fb-19691e66f5ba"
        }
    time.sleep(.25)
    response = requests.request("GET", url, headers=headers, params=querystring)
    return response.json()
    #print(response.text)

In [5]:
sta =gpd.GeoDataFrame.from_file('Subway Stations/geo_export_5e33d456-40a6-4bc5-83f8-d922a76657e4.shp')

In [6]:
sta.head()


Out[6]:
geometry line name notes objectid url
0 POINT (-73.99106999861967 40.73005400028978) 4-6-6 Express Astor Pl 4 nights, 6-all times, 6 Express-weekdays AM s... 1.0 http://web.mta.info/nyct/service/
1 POINT (-74.00019299927328 40.71880300107709) 4-6-6 Express Canal St 4 nights, 6-all times, 6 Express-weekdays AM s... 2.0 http://web.mta.info/nyct/service/
2 POINT (-73.98384899986625 40.76172799961419) 1-2 50th St 1-all times, 2-nights 3.0 http://web.mta.info/nyct/service/
3 POINT (-73.97499915116808 40.68086213682956) 2-3-4 Bergen St 4-nights, 3-all other times, 2-all times 4.0 http://web.mta.info/nyct/service/
4 POINT (-73.89488591154061 40.66471445143568) 3-4 Pennsylvania Ave 4-nights, 3-all other times 5.0 http://web.mta.info/nyct/service/

In [7]:
stal = sta[sta.line=='L']
stal


Out[7]:
geometry line name notes objectid url
71 POINT (-73.95024799996972 40.71407200064717) L Lorimer St L-all times 72.0 http://web.mta.info/nyct/service/
72 POINT (-73.9019160004208 40.66914500061398) L Sutter Ave L-all times 73.0 http://web.mta.info/nyct/service/
73 POINT (-73.90395860491864 40.68886654246024) L Wilson Ave L-all times 74.0 http://web.mta.info/nyct/service/
145 POINT (-73.98168087489128 40.73097497580066) L 1st Ave L-all times 146.0 http://web.mta.info/nyct/service/
148 POINT (-73.94049699874644 40.71157600064823) L Grand St L-all times 149.0 http://web.mta.info/nyct/service/
149 POINT (-73.94394399869037 40.71457599836364) L Graham Ave L-all times 150.0 http://web.mta.info/nyct/service/
150 POINT (-73.95666499806525 40.71717399858899) L Bedford Ave L-all times 151.0 http://web.mta.info/nyct/service/
151 POINT (-73.93979284713505 40.70739106438455) L Montrose Ave L-all times 152.0 http://web.mta.info/nyct/service/
193 POINT (-73.9030969995401 40.67534466640805) L Atlantic Ave L-all times 194.0 http://web.mta.info/nyct/service/
210 POINT (-73.90393400118632 40.69551800114878) L Halsey St L-all times 211.0 http://web.mta.info/nyct/service/
211 POINT (-73.91097571826469 40.69947106242714) L Myrtle - Wyckoff Aves L-all times 212.0 http://web.mta.info/nyct/service/
215 POINT (-73.90056237226057 40.66405727094644) L Livonia Ave L-all times 216.0 http://web.mta.info/nyct/service/
218 POINT (-73.90185000017287 40.64665366739528) L Canarsie - Rockaway Pkwy L-all times 219.0 http://web.mta.info/nyct/service/
219 POINT (-73.89954769388724 40.65046878544699) L E 105th St L-all times 220.0 http://web.mta.info/nyct/service/
222 POINT (-73.89927796057143 40.65891477368527) L New Lots Ave L-all times 223.0 http://web.mta.info/nyct/service/
234 POINT (-73.91823200219723 40.70369299961644) L DeKalb Ave L-all times 235.0 http://web.mta.info/nyct/service/
281 POINT (-73.90526176305106 40.68286062551184) L Bushwick - Aberdeen L-all times 282.0 http://web.mta.info/nyct/service/
282 POINT (-73.90311757920684 40.67845624842869) L Broadway Junction L-all times 283.0 http://web.mta.info/nyct/service/
304 POINT (-73.9229130000312 40.70660666598872) L Jefferson St L-all times 305.0 http://web.mta.info/nyct/service/
305 POINT (-73.93314700024209 40.70615166680729) L Morgan Ave L-all times 306.0 http://web.mta.info/nyct/service/
382 POINT (-73.98575000112093 40.73269099971662) L 3rd Ave L-all times 383.0 http://web.mta.info/nyct/service/
383 POINT (-73.99066976901818 40.73476331217923) L Union Sq - 14th St L-all times 384.0 http://web.mta.info/nyct/service/
441 POINT (-73.99775078874781 40.73774146981052) L 6th Ave L-all times 442.0 http://web.mta.info/nyct/service/
442 POINT (-74.00257800104762 40.73977666638199) L 8th Ave L-all times 443.0 http://web.mta.info/nyct/service/

In [48]:
cloud= gpd.GeoDataFrame.from_file('output/pointcloud.shp')
cloud.crs
cloud.head()


Out[48]:
Id OBJECTID geometry
0 0 1 POINT (1011383.683253422 172031.4200946689)
1 0 2 POINT (1011833.683337674 172031.4200946689)
2 0 3 POINT (1012283.683421925 172031.4200946689)
3 0 4 POINT (1012733.683178097 172031.4200946689)
4 0 5 POINT (1013183.683262333 172031.4200946689)

In [50]:
cloud = cloud.to_crs(epsg=4326)

In [51]:
cloud.plot()


Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x153b7160>

In [54]:
cloud.head()


Out[54]:
Id OBJECTID geometry
0 0 1 POINT (-73.90223336955297 40.638821508269)
1 0 2 POINT (-73.90061195479413 40.63882011829155)
2 0 3 POINT (-73.89899054010338 40.63881870545156)
3 0 4 POINT (-73.89736912666398 40.63881726975008)
4 0 5 POINT (-73.89574771211282 40.63881581118501)

In [8]:
stal.drop([442,441,383,382,145], inplace=True)


C:\Users\ferna\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [9]:
#f, ax = pl.subplots(figsize=(55,55))

stal.plot()
#pl.axis('off')
#ticks = ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x13f67390>

In [24]:
stal.geometry[71].y


Out[24]:
40.71407200064717

In [73]:
columns = [
'yelp_lat',
'yelp_long',
'yelp_distance',
'yelp_name',
'yelp_street',
'yelp_city',
'yelp_state',
'yelp_zip_code',
'yelp_cat1',
'yelp_cat2',
]
index = pd.Series(range(stal.shape[0]*3457))
df = pd.DataFrame(index=None, columns=columns)
df = df.fillna(0)
df.head()


Out[73]:
yelp_lat yelp_long yelp_distance yelp_name yelp_street yelp_city yelp_state yelp_zip_code yelp_cat1 yelp_cat2

In [74]:
def getx(point):
    return point.x

def gety(point):
    return point.y

In [75]:
cloud.shape


Out[75]:
(1481, 3)

In [76]:
addressold = ''
old_index = np.nan
count=0
#customer_iter = iter(Customer_Info.index)
yelp_cat1 =[]
yelp_cat2 =[]
yelp_lat=[]
yelp_long=[]
yelp_distance=[]
yelp_name=[]
yelp_street=[]
yelp_city=[]
yelp_state=[]
yelp_zip_code=[]

for i in cloud.index:
    print i,count
    r_json = yelp(gety(cloud.ix[i,'geometry']), getx(cloud.ix[i,'geometry']), 100)
    print len(r_json['businesses'])#, r_json['businesses']
    if len(r_json['businesses'])>0:
        for j in range(len(r_json['businesses'])):
            if r_json['businesses'][j][u'categories']!=[]:
                yelp_cat1.append(r_json['businesses'][j][u'categories'][0][u'alias'])
                yelp_cat2.append(r_json['businesses'][j][u'categories'][0][u'title'])
                yelp_lat.append(r_json['businesses'][j][u'coordinates'][u'latitude'])
                yelp_long.append(r_json['businesses'][j][u'coordinates'][u'longitude'])
                yelp_distance.append(r_json['businesses'][j][u'distance'])
                yelp_name.append(r_json['businesses'][j]['name'])
                yelp_street.append(r_json['businesses'][j][u'location'][u'address1']) #Street
                yelp_city.append(r_json['businesses'][j][u'location']['city'])
                yelp_state.append(r_json['businesses'][j][u'location']['state'])
                yelp_zip_code.append(r_json['businesses'][j][u'location'][u'zip_code'])

        
    count+=1
    if count==2000:
        break
df.yelp_cat1=yelp_cat1
df.yelp_cat2=yelp_cat2
df.yelp_lat=yelp_lat
df.yelp_long=yelp_long
df.yelp_distance=yelp_distance
df.yelp_name=yelp_name
df.yelp_street=yelp_street
df.yelp_city=yelp_city
df.yelp_state=yelp_state
df.yelp_zip_code=yelp_zip_code


0 0
0
1 1
0
2 2
0
3 3
5
4 4
22
5 5
16
6 6
0
7 7
6
8 8
8
9 9
10
10 10
2
11 11
0
12 12
0
13 13
0
14 14
1
15 15
1
16 16
3
17 17
0
18 18
0
19 19
3
20 20
3
21 21
24
22 22
21
23 23
0
24 24
1
25 25
5
26 26
0
27 27
0
28 28
1
29 29
0
30 30
0
31 31
0
32 32
0
33 33
0
34 34
2
35 35
1
36 36
19
37 37
21
38 38
5
39 39
11
40 40
6
41 41
7
42 42
0
43 43
0
44 44
0
45 45
0
46 46
0
47 47
2
48 48
3
49 49
0
50 50
1
51 51
1
52 52
0
53 53
14
54 54
16
55 55
1
56 56
7
57 57
0
58 58
0
59 59
0
60 60
0
61 61
0
62 62
0
63 63
4
64 64
5
65 65
0
66 66
1
67 67
1
68 68
4
69 69
25
70 70
6
71 71
1
72 72
2
73 73
1
74 74
0
75 75
0
76 76
0
77 77
0
78 78
0
79 79
2
80 80
1
81 81
0
82 82
1
83 83
1
84 84
10
85 85
11
86 86
2
87 87
0
88 88
3
89 89
4
90 90
1
91 91
0
92 92
0
93 93
1
94 94
1
95 95
1
96 96
0
97 97
2
98 98
3
99 99
6
100 100
0
101 101
1
102 102
2
103 103
3
104 104
3
105 105
2
106 106
4
107 107
0
108 108
5
109 109
1
110 110
1
111 111
2
112 112
6
113 113
8
114 114
1
115 115
2
116 116
3
117 117
2
118 118
1
119 119
1
120 120
5
121 121
5
122 122
1
123 123
1
124 124
3
125 125
12
126 126
2
127 127
2
128 128
2
129 129
0
130 130
1
131 131
0
132 132
1
133 133
10
134 134
6
135 135
0
136 136
5
137 137
2
138 138
0
139 139
3
140 140
7
141 141
2
142 142
1
143 143
1
144 144
0
145 145
0
146 146
0
147 147
0
148 148
9
149 149
10
150 150
1
151 151
3
152 152
3
153 153
3
154 154
4
155 155
1
156 156
0
157 157
0
158 158
0
159 159
0
160 160
0
161 161
0
162 162
1
163 163
3
164 164
9
165 165
2
166 166
4
167 167
0
168 168
0
169 169
0
170 170
0
171 171
0
172 172
0
173 173
0
174 174
0
175 175
0
176 176
1
177 177
0
178 178
15
179 179
6
180 180
2
181 181
3
182 182
2
183 183
0
184 184
1
185 185
1
186 186
0
187 187
0
188 188
3
189 189
0
190 190
0
191 191
0
192 192
1
193 193
4
194 194
24
195 195
7
196 196
1
197 197
3
198 198
3
199 199
1
200 200
2
201 201
2
202 202
0
203 203
0
204 204
5
205 205
1
206 206
3
207 207
1
208 208
2
209 209
9
210 210
13
211 211
0
212 212
0
213 213
2
214 214
2
215 215
1
216 216
1
217 217
1
218 218
2
219 219
2
220 220
1
221 221
3
222 222
3
223 223
3
224 224
6
225 225
16
226 226
1
227 227
1
228 228
0
229 229
3
230 230
0
231 231
0
232 232
0
233 233
0
234 234
2
235 235
2
236 236
0
237 237
2
238 238
0
239 239
1
240 240
6
241 241
10
242 242
1
243 243
2
244 244
2
245 245
0
246 246
0
247 247
0
248 248
1
249 249
0
250 250
0
251 251
0
252 252
0
253 253
0
254 254
0
255 255
1
256 256
0
257 257
0
258 258
3
259 259
0
260 260
0
261 261
2
262 262
1
263 263
0
264 264
0
265 265
0
266 266
1
267 267
1
268 268
0
269 269
0
270 270
0
271 271
4
272 272
7
273 273
1
274 274
1
275 275
0
276 276
2
277 277
1
278 278
0
279 279
1
280 280
3
281 281
0
282 282
1
283 283
0
284 284
1
285 285
1
286 286
1
287 287
1
288 288
0
289 289
6
290 290
11
291 291
4
292 292
2
293 293
0
294 294
0
295 295
1
296 296
3
297 297
3
298 298
0
299 299
3
300 300
1
301 301
2
302 302
0
303 303
2
304 304
2
305 305
0
306 306
3
307 307
3
308 308
3
309 309
2
310 310
5
311 311
1
312 312
1
313 313
5
314 314
0
315 315
0
316 316
3
317 317
1
318 318
4
319 319
1
320 320
1
321 321
1
322 322
0
323 323
5
324 324
20
325 325
1
326 326
3
327 327
7
328 328
3
329 329
1
330 330
1
331 331
3
332 332
0
333 333
0
334 334
1
335 335
0
336 336
2
337 337
1
338 338
0
339 339
0
340 340
0
341 341
4
342 342
20
343 343
1
344 344
3
345 345
1
346 346
3
347 347
0
348 348
0
349 349
4
350 350
6
351 351
0
352 352
4
353 353
1
354 354
0
355 355
3
356 356
1
357 357
1
358 358
0
359 359
1
360 360
3
361 361
0
362 362
0
363 363
1
364 364
1
365 365
1
366 366
6
367 367
11
368 368
17
369 369
5
370 370
11
371 371
1
372 372
1
373 373
3
374 374
1
375 375
4
376 376
3
377 377
5
378 378
4
379 379
0
380 380
1
381 381
1
382 382
1
383 383
7
384 384
6
385 385
13
386 386
20
387 387
10
388 388
9
389 389
1
390 390
3
391 391
1
392 392
1
393 393
3
394 394
3
395 395
4
396 396
1
397 397
0
398 398
2
399 399
1
400 400
0
401 401
1
402 402
1
403 403
7
404 404
2
405 405
12
406 406
6
407 407
2
408 408
0
409 409
3
410 410
0
411 411
2
412 412
2
413 413
1
414 414
3
415 415
1
416 416
0
417 417
1
418 418
0
419 419
0
420 420
1
421 421
0
422 422
6
423 423
2
424 424
4
425 425
1
426 426
2
427 427
1
428 428
4
429 429
0
430 430
1
431 431
2
432 432
1
433 433
4
434 434
2
435 435
2
436 436
8
437 437
3
438 438
2
439 439
1
440 440
1
441 441
0
442 442
2
443 443
6
444 444
3
445 445
6
446 446
1
447 447
3
448 448
2
449 449
1
450 450
0
451 451
2
452 452
2
453 453
2
454 454
3
455 455
8
456 456
4
457 457
0
458 458
1
459 459
0
460 460
1
461 461
4
462 462
3
463 463
6
464 464
1
465 465
0
466 466
2
467 467
4
468 468
4
469 469
7
470 470
6
471 471
3
472 472
4
473 473
3
474 474
5
475 475
2
476 476
2
477 477
4
478 478
2
479 479
1
480 480
2
481 481
3
482 482
1
483 483
1
484 484
1
485 485
9
486 486
9
487 487
11
488 488
7
489 489
6
490 490
9
491 491
10
492 492
8
493 493
2
494 494
2
495 495
5
496 496
2
497 497
7
498 498
4
499 499
0
500 500
2
501 501
1
502 502
2
503 503
8
504 504
7
505 505
11
506 506
4
507 507
7
508 508
7
509 509
11
510 510
5
511 511
8
512 512
0
513 513
3
514 514
1
515 515
11
516 516
3
517 517
1
518 518
2
519 519
2
520 520
2
521 521
2
522 522
3
523 523
3
524 524
6
525 525
5
526 526
2
527 527
4
528 528
6
529 529
12
530 530
0
531 531
0
532 532
2
533 533
4
534 534
5
535 535
0
536 536
3
537 537
9
538 538
8
539 539
2
540 540
0
541 541
1
542 542
0
543 543
5
544 544
2
545 545
4
546 546
0
547 547
0
548 548
0
549 549
0
550 550
2
551 551
4
552 552
1
553 553
1
554 554
4
555 555
7
556 556
7
557 557
2
558 558
0
559 559
0
560 560
0
561 561
0
562 562
0
563 563
3
564 564
4
565 565
1
566 566
3
567 567
4
568 568
8
569 569
3
570 570
3
571 571
5
572 572
3
573 573
0
574 574
0
575 575
0
576 576
0
577 577
1
578 578
0
579 579
0
580 580
7
581 581
2
582 582
3
583 583
9
584 584
7
585 585
3
586 586
4
587 587
2
588 588
0
589 589
0
590 590
0
591 591
0
592 592
0
593 593
1
594 594
0
595 595
0
596 596
3
597 597
10
598 598
8
599 599
9
600 600
2
601 601
3
602 602
4
603 603
0
604 604
0
605 605
0
606 606
0
607 607
0
608 608
0
609 609
0
610 610
11
611 611
14
612 612
8
613 613
3
614 614
1
615 615
3
616 616
1
617 617
0
618 618
0
619 619
0
620 620
0
621 621
0
622 622
0
623 623
0
624 624
15
625 625
6
626 626
3
627 627
1
628 628
1
629 629
2
630 630
1
631 631
0
632 632
0
633 633
0
634 634
0
635 635
0
636 636
0
637 637
0
638 638
8
639 639
0
640 640
1
641 641
1
642 642
1
643 643
2
644 644
1
645 645
4
646 646
0
647 647
0
648 648
0
649 649
0
650 650
0
651 651
5
652 652
1
653 653
1
654 654
1
655 655
0
656 656
4
657 657
6
658 658
6
659 659
0
660 660
0
661 661
0
662 662
0
663 663
0
664 664
0
665 665
2
666 666
2
667 667
2
668 668
0
669 669
0
670 670
3
671 671
10
672 672
5
673 673
5
674 674
2
675 675
2
676 676
0
677 677
0
678 678
0
679 679
0
680 680
0
681 681
1
682 682
1
683 683
2
684 684
3
685 685
0
686 686
3
687 687
3
688 688
8
689 689
2
690 690
4
691 691
4
692 692
9
693 693
0
694 694
0
695 695
1
696 696
4
697 697
2
698 698
1
699 699
0
700 700
6
701 701
0
702 702
8
703 703
1
704 704
2
705 705
4
706 706
4
707 707
7
708 708
1
709 709
4
710 710
1
711 711
2
712 712
11
713 713
2
714 714
4
715 715
2
716 716
4
717 717
2
718 718
0
719 719
0
720 720
2
721 721
6
722 722
3
723 723
7
724 724
10
725 725
2
726 726
1
727 727
4
728 728
6
729 729
13
730 730
2
731 731
5
732 732
4
733 733
5
734 734
8
735 735
5
736 736
5
737 737
5
738 738
1
739 739
0
740 740
0
741 741
0
742 742
2
743 743
0
744 744
1
745 745
0
746 746
38
747 747
1
748 748
2
749 749
2
750 750
4
751 751
6
752 752
10
753 753
6
754 754
5
755 755
17
756 756
12
757 757
2
758 758
8
759 759
6
760 760
5
761 761
2
762 762
1
763 763
0
764 764
0
765 765
36
766 766
20
767 767
13
768 768
5
769 769
10
770 770
6
771 771
3
772 772
5
773 773
5
774 774
35
775 775
1
776 776
0
777 777
2
778 778
3
779 779
6
780 780
5
781 781
10
782 782
23
783 783
20
784 784
6
785 785
4
786 786
5
787 787
6
788 788
2
789 789
1
790 790
2
791 791
0
792 792
0
793 793
1
794 794
0
795 795
16
796 796
20
797 797
21
798 798
29
799 799
16
800 800
16
801 801
5
802 802
7
803 803
13
804 804
11
805 805
7
806 806
15
807 807
14
808 808
21
809 809
17
810 810
12
811 811
17
812 812
11
813 813
12
814 814
12
815 815
22
816 816
8
817 817
7
818 818
5
819 819
1
820 820
3
821 821
2
822 822
20
823 823
21
824 824
1
825 825
1
826 826
3
827 827
11
828 828
18
829 829
16
830 830
4
831 831
9
832 832
7
833 833
2
834 834
7
835 835
8
836 836
13
837 837
21
838 838
10
839 839
20
840 840
30
841 841
24
842 842
28
843 843
19
844 844
33
845 845
33
846 846
39
847 847
30
848 848
29
849 849
22
850 850
19
851 851
31
852 852
28
853 853
14
854 854
8
855 855
3
856 856
1
857 857
0
858 858
3
859 859
16
860 860
35
861 861
30
862 862
3
863 863
5
864 864
7
865 865
0
866 866
2
867 867
0
868 868
10
869 869
8
870 870
9
871 871
17
872 872
27
873 873
35
874 874
22
875 875
6
876 876
12
877 877
14
878 878
19
879 879
29
880 880
36
881 881
28
882 882
29
883 883
32
884 884
28
885 885
36
886 886
36
887 887
27
888 888
20
889 889
29
890 890
16
891 891
13
892 892
3
893 893
3
894 894
28
895 895
50
896 896
22
897 897
2
898 898
5
899 899
12
900 900
0
901 901
7
902 902
0
903 903
11
904 904
8
905 905
15
906 906
26
907 907
26
908 908
46
909 909
12
910 910
11
911 911
19
912 912
11
913 913
12
914 914
5
915 915
12
916 916
15
917 917
15
918 918
17
919 919
50
920 920
14
921 921
24
922 922
15
923 923
13
924 924
50
925 925
17
926 926
17
927 927
3
928 928
3
929 929
5
930 930
16
931 931
50
932 932
41
933 933
0
934 934
8
935 935
5
936 936
7
937 937
24
938 938
17
939 939
18
940 940
14
941 941
33
942 942
28
943 943
5
944 944
29
945 945
10
946 946
35
947 947
22
948 948
7
949 949
5
950 950
3
951 951
9
952 952
19
953 953
15
954 954
4
955 955
50
956 956
11
957 957
16
958 958
4
959 959
3
960 960
50
961 961
19
962 962
6
963 963
0
964 964
2
965 965
6
966 966
4
967 967
3
968 968
7
969 969
50
970 970
50
971 971
4
972 972
6
973 973
0
974 974
10
975 975
36
976 976
20
977 977
24
978 978
36
979 979
24
980 980
13
981 981
11
982 982
31
983 983
19
984 984
37
985 985
10
986 986
6
987 987
5
988 988
6
989 989
15
990 990
9
991 991
8
992 992
2
993 993
10
994 994
27
995 995
3
996 996
4
997 997
9
998 998
31
999 999
2
1000 1000
2
1001 1001
6
1002 1002
7
1003 1003
16
1004 1004
7
1005 1005
4
1006 1006
1
1007 1007
10
1008 1008
50
1009 1009
35
1010 1010
8
1011 1011
7
1012 1012
5
1013 1013
8
1014 1014
36
1015 1015
12
1016 1016
15
1017 1017
36
1018 1018
5
1019 1019
14
1020 1020
25
1021 1021
28
1022 1022
15
1023 1023
12
1024 1024
5
1025 1025
8
1026 1026
4
1027 1027
6
1028 1028
8
1029 1029
6
1030 1030
7
1031 1031
50
1032 1032
50
1033 1033
27
1034 1034
2
1035 1035
2
1036 1036
3
1037 1037
6
1038 1038
11
1039 1039
20
1040 1040
10
1041 1041
30
1042 1042
14
1043 1043
6
1044 1044
7
1045 1045
14
1046 1046
29
1047 1047
14
1048 1048
21
1049 1049
6
1050 1050
4
1051 1051
5
1052 1052
21
1053 1053
9
1054 1054
9
1055 1055
10
1056 1056
2
1057 1057
11
1058 1058
28
1059 1059
35
1060 1060
3
1061 1061
3
1062 1062
0
1063 1063
5
1064 1064
7
1065 1065
5
1066 1066
2
1067 1067
6
1068 1068
3
1069 1069
50
1070 1070
50
1071 1071
6
1072 1072
1
1073 1073
10
1074 1074
21
1075 1075
34
1076 1076
26
1077 1077
13
1078 1078
22
1079 1079
8
1080 1080
17
1081 1081
11
1082 1082
8
1083 1083
10
1084 1084
16
1085 1085
20
1086 1086
19
1087 1087
25
1088 1088
11
1089 1089
5
1090 1090
0
1091 1091
9
1092 1092
4
1093 1093
9
1094 1094
2
1095 1095
1
1096 1096
10
1097 1097
18
1098 1098
22
1099 1099
4
1100 1100
0
1101 1101
0
1102 1102
2
1103 1103
5
1104 1104
4
1105 1105
3
1106 1106
3
1107 1107
2
1108 1108
4
1109 1109
10
1110 1110
0
1111 1111
0
1112 1112
19
1113 1113
10
1114 1114
20
1115 1115
42
1116 1116
50
1117 1117
50
1118 1118
9
1119 1119
9
1120 1120
7
1121 1121
13
1122 1122
11
1123 1123
4
1124 1124
7
1125 1125
6
1126 1126
8
1127 1127
10
1128 1128
12
1129 1129
11
1130 1130
5
1131 1131
4
1132 1132
2
1133 1133
1
1134 1134
0
1135 1135
1
1136 1136
5
1137 1137
6
1138 1138
4
1139 1139
6
1140 1140
0
1141 1141
0
1142 1142
0
1143 1143
2
1144 1144
2
1145 1145
3
1146 1146
0
1147 1147
3
1148 1148
4
1149 1149
1
1150 1150
35
1151 1151
33
1152 1152
24
1153 1153
45
1154 1154
50
1155 1155
50
1156 1156
50
1157 1157
16
1158 1158
19
1159 1159
41
1160 1160
38
1161 1161
36
1162 1162
36
1163 1163
37
1164 1164
18
1165 1165
12
1166 1166
3
1167 1167
4
1168 1168
20
1169 1169
17
1170 1170
7
1171 1171
0
1172 1172
1
1173 1173
0
1174 1174
0
1175 1175
2
1176 1176
0
1177 1177
7
1178 1178
0
1179 1179
0
1180 1180
0
1181 1181
1
1182 1182
0
1183 1183
2
1184 1184
2
1185 1185
2
1186 1186
49
1187 1187
27
1188 1188
30
1189 1189
47
1190 1190
50
1191 1191
50
1192 1192
16
1193 1193
13
1194 1194
24
1195 1195
43
1196 1196
39
1197 1197
33
1198 1198
38
1199 1199
50
1200 1200
28
1201 1201
31
1202 1202
3
1203 1203
6
1204 1204
19
1205 1205
26
1206 1206
5
1207 1207
0
1208 1208
0
1209 1209
1
1210 1210
1
1211 1211
0
1212 1212
1
1213 1213
1
1214 1214
4
1215 1215
2
1216 1216
0
1217 1217
0
1218 1218
0
1219 1219
0
1220 1220
1
1221 1221
1
1222 1222
35
1223 1223
13
1224 1224
42
1225 1225
22
1226 1226
45
1227 1227
50
1228 1228
39
1229 1229
3
1230 1230
19
1231 1231
29
1232 1232
32
1233 1233
9
1234 1234
6
1235 1235
37
1236 1236
15
1237 1237
20
1238 1238
6
1239 1239
5
1240 1240
11
1241 1241
23
1242 1242
3
1243 1243
4
1244 1244
0
1245 1245
1
1246 1246
1
1247 1247
2
1248 1248
1
1249 1249
4
1250 1250
3
1251 1251
6
1252 1252
4
1253 1253
3
1254 1254
5
1255 1255
5
1256 1256
4
1257 1257
17
1258 1258
9
1259 1259
50
1260 1260
50
1261 1261
50
1262 1262
35
1263 1263
40
1264 1264
50
1265 1265
17
1266 1266
39
1267 1267
39
1268 1268
12
1269 1269
12
1270 1270
50
1271 1271
16
1272 1272
9
1273 1273
6
1274 1274
4
1275 1275
8
1276 1276
13
1277 1277
4
1278 1278
2
1279 1279
4
1280 1280
1
1281 1281
3
1282 1282
1
1283 1283
2
1284 1284
1
1285 1285
5
1286 1286
1
1287 1287
5
1288 1288
6
1289 1289
10
1290 1290
5
1291 1291
3
1292 1292
25
1293 1293
17
1294 1294
47
1295 1295
50
1296 1296
50
1297 1297
22
1298 1298
17
1299 1299
50
1300 1300
14
1301 1301
18
1302 1302
20
1303 1303
11
1304 1304
11
1305 1305
50
1306 1306
19
1307 1307
11
1308 1308
9
1309 1309
6
1310 1310
4
1311 1311
7
1312 1312
2
1313 1313
1
1314 1314
3
1315 1315
4
1316 1316
0
1317 1317
0
1318 1318
1
1319 1319
0
1320 1320
3
1321 1321
8
1322 1322
2
1323 1323
2
1324 1324
46
1325 1325
27
1326 1326
47
1327 1327
29
1328 1328
50
1329 1329
50
1330 1330
26
1331 1331
9
1332 1332
17
1333 1333
14
1334 1334
13
1335 1335
9
1336 1336
15
1337 1337
37
1338 1338
6
1339 1339
9
1340 1340
6
1341 1341
2
1342 1342
3
1343 1343
6
1344 1344
0
1345 1345
0
1346 1346
2
1347 1347
2
1348 1348
43
1349 1349
35
1350 1350
34
1351 1351
50
1352 1352
50
1353 1353
50
1354 1354
50
1355 1355
18
1356 1356
17
1357 1357
11
1358 1358
15
1359 1359
15
1360 1360
22
1361 1361
32
1362 1362
7
1363 1363
8
1364 1364
0
1365 1365
0
1366 1366
2
1367 1367
2
1368 1368
0
1369 1369
0
1370 1370
1
1371 1371
32
1372 1372
45
1373 1373
50
1374 1374
47
1375 1375
48
1376 1376
50
1377 1377
21
1378 1378
10
1379 1379
7
1380 1380
13
1381 1381
15
1382 1382
14
1383 1383
9
1384 1384
10
1385 1385
7
1386 1386
5
1387 1387
2
1388 1388
1
1389 1389
0
1390 1390
0
1391 1391
0
1392 1392
0
1393 1393
15
1394 1394
50
1395 1395
28
1396 1396
11
1397 1397
17
1398 1398
33
1399 1399
8
1400 1400
5
1401 1401
4
1402 1402
5
1403 1403
3
1404 1404
5
1405 1405
6
1406 1406
6
1407 1407
7
1408 1408
9
1409 1409
6
1410 1410
0
1411 1411
0
1412 1412
0
1413 1413
46
1414 1414
14
1415 1415
15
1416 1416
23
1417 1417
19
1418 1418
4
1419 1419
1
1420 1420
12
1421 1421
22
1422 1422
13
1423 1423
12
1424 1424
5
1425 1425
5
1426 1426
15
1427 1427
9
1428 1428
7
1429 1429
0
1430 1430
1
1431 1431
2
1432 1432
1
1433 1433
13
1434 1434
15
1435 1435
17
1436 1436
13
1437 1437
25
1438 1438
50
1439 1439
25
1440 1440
15
1441 1441
20
1442 1442
16
1443 1443
5
1444 1444
14
1445 1445
10
1446 1446
4
1447 1447
3
1448 1448
1
1449 1449
0
1450 1450
1
1451 1451
4
1452 1452
11
1453 1453
17
1454 1454
41
1455 1455
50
1456 1456
34
1457 1457
25
1458 1458
23
1459 1459
15
1460 1460
6
1461 1461
7
1462 1462
6
1463 1463
3
1464 1464
5
1465 1465
7
1466 1466
8
1467 1467
8
1468 1468
44
1469 1469
50
1470 1470
29
1471 1471
28
1472 1472
28
1473 1473
11
1474 1474
11
1475 1475
11
1476 1476
7
1477 1477
10
1478 1478
50
1479 1479
50
1480 1480
7

In [77]:
df.shape[0]


Out[77]:
12632

In [78]:
df.drop_duplicates().shape[0]


Out[78]:
12622

In [79]:
df.head()


Out[79]:
yelp_lat yelp_long yelp_distance yelp_name yelp_street yelp_city yelp_state yelp_zip_code yelp_cat1 yelp_cat2
0 40.637911 -73.896706 110.587118 Bamboo Garden 9505 Avenue L Brooklyn NY 11236 caribbean Caribbean
1 40.637718 -73.897255 124.609490 Lin Wong Restaurant 9417 Ave L Brooklyn NY 11236 chinese Chinese
2 40.637550 -73.897310 131.084155 Chloe's Restaurant & Lounge 9413 Ave L Brooklyn NY 11236 haitian Haitian
3 40.639790 -73.896881 115.517781 Rockaway Group Family Daycare 1696 Rockaway Pkwy Brooklyn NY 11236 childcare Child Care & Day Care
4 40.637753 -73.896623 128.225645 Canarsie's New Look 9510 Avenue L Brooklyn NY 11236 hair Hair Salons

In [80]:
df.drop_duplicates().shape[0]


Out[80]:
12622

In [82]:
df1 = df.drop_duplicates()

In [84]:
df1.to_csv('output/business.csv', encoding='utf-8')

In [85]:
#re in a conda env: conda install -c conda-forge imageio
!pip install imageio
#Good old python setup.py install


Collecting imageio
  Downloading imageio-2.2.0.tar.gz (3.3MB)
Requirement already satisfied: numpy in c:\python27\lib\site-packages (from imageio)
Collecting pillow (from imageio)
  Downloading Pillow-4.1.1-cp27-cp27m-win32.whl (1.2MB)
Collecting olefile (from pillow->imageio)
  Downloading olefile-0.44.zip (74kB)
Installing collected packages: olefile, pillow, imageio
  Running setup.py install for olefile: started
    Running setup.py install for olefile: finished with status 'done'
  Running setup.py install for imageio: started
    Running setup.py install for imageio: finished with status 'done'
Successfully installed imageio-2.2.0 olefile-0.44 pillow-4.1.1

In [1]:
from PIL import Image

In [3]:
img = Image.open("Data/2-0normal.gif")

In [4]:
img2 = img.crop((0, 0, 100, 100))
#img2.save("img2.jpg")

In [ ]:
re in a conda env: conda install -c conda-forge imageio
If you have pip: pip install imageio
Good old python setup.py install